# ==================================
#
#  Code for replicating:
# "Positioning Under Alternative Electoral Systems: Evidence From Japanese Candidate Election Manifestos"
#  Amy Catalinac, NYU
#
# ==================================






# ==================================
# Calculating mean district-level dispersion - - all cands prior to ER; LDP, DPJ, and NFP after ER
# (Figure 5 in Online Appendix)

district.var.pre <- read.csv("dispersion_in_districts_preER_all.csv")
colnames(district.var.pre) <- c("ku","1986","1990","1993")

district.var.post <- read.csv("dispersion_in_districts_postER_majorcands.csv")
colnames(district.var.post) <- c("ku","1996","2000","2003", "2005", "2009")

length(na.omit(district.var.pre$"1986"))
length(na.omit(district.var.pre$"1990"))
length(na.omit(district.var.pre$"1993"))
length(na.omit(district.var.post$"1996"))
length(na.omit(district.var.post$"2000"))
length(na.omit(district.var.post$"2003"))
length(na.omit(district.var.post$"2005"))
length(na.omit(district.var.post$"2009"))

av.district.var <- as.vector(c(mean(na.omit(district.var.pre$"1986")), 
                               mean(na.omit(district.var.pre$"1990")), 
                               mean(na.omit(district.var.pre$"1993")),
                               mean(na.omit(district.var.post$"1996")), 
                               mean(na.omit(district.var.post$"2000")), 
                               mean(na.omit(district.var.post$"2003")),
                               mean(na.omit(district.var.post$"2005")), 
                               mean(na.omit(district.var.post$"2009"))))

av.district.var

se <- function(x) sqrt(var(x)/length(x))

se.district.var <- as.vector(c(se(na.omit(district.var.pre$"1986")), 
                               se(na.omit(district.var.pre$"1990")), 
                               se(na.omit(district.var.pre$"1993")),
                               se(na.omit(district.var.post$"1996")), 
                               se(na.omit(district.var.post$"2000")), 
                               se(na.omit(district.var.post$"2003")),
                               se(na.omit(district.var.post$"2005")), 
                               se(na.omit(district.var.post$"2009"))))
se.district.var

conf.1986.upp <- av.district.var[1]+(1.96*(se.district.var[1]))
conf.1986.low <- av.district.var[1]-(1.96*(se.district.var[1]))

conf.1990.upp <- av.district.var[2]+(1.96*(se.district.var[2]))
conf.1990.low <- av.district.var[2]-(1.96*(se.district.var[2]))

conf.1993.upp <- av.district.var[3]+(1.96*(se.district.var[3]))
conf.1993.low <- av.district.var[3]-(1.96*(se.district.var[3]))

conf.1996.upp <- av.district.var[4]+(1.96*(se.district.var[4]))
conf.1996.low <- av.district.var[4]-(1.96*(se.district.var[4]))

inter.1996 <- c(conf.1996.upp, conf.1996.low)
inter.1996

conf.2000.upp <- av.district.var[5]+(1.96*(se.district.var[5]))
conf.2000.low <- av.district.var[5]-(1.96*(se.district.var[5]))

inter.2000 <- c(conf.2000.upp, conf.2000.low)
inter.2000

conf.2003.upp <- av.district.var[6]+(1.96*(se.district.var[6]))
conf.2003.low <- av.district.var[6]-(1.96*(se.district.var[6]))

inter.2003 <- c(conf.2003.upp, conf.2003.low)
inter.2003

conf.2005.upp <- av.district.var[7]+(1.96*(se.district.var[7]))
conf.2005.low <- av.district.var[7]-(1.96*(se.district.var[7]))

inter.2005 <- c(conf.2005.upp, conf.2005.low)
inter.2005

conf.2009.upp <- av.district.var[8]+(1.96*(se.district.var[8]))
conf.2009.low <- av.district.var[8]-(1.96*(se.district.var[8]))

years <- c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009)

par(mfrow = c(1, 1), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)
plot(years, av.district.var, pch=19, ylim=c(0,2.7), 
     ylab="Mean District-Level Dispersion (Cands Aiming to Win)", xlab="Year", xaxt="n", cex.lab=1.5)
axis(1, at=c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009), cex.axis=1.5)
lines(years, av.district.var, lwd =3)

arrows(1986, conf.1986.upp, 1986, conf.1986.low, code = 3, lwd=1, angle=90)
arrows(1990, conf.1990.upp, 1990, conf.1990.low, code = 3, lwd=1, angle=90)
arrows(1993, conf.1993.upp, 1993, conf.1993.low, code = 3, lwd=1, angle=90)
arrows(1996, conf.1996.upp, 1996, conf.1996.low, code = 3, lwd=1, angle=90)
arrows(2000, conf.2000.upp, 2000, conf.2000.low, code = 3, lwd=1, angle=90)
arrows(2003, conf.2003.upp, 2003, conf.2003.low, code = 3, lwd=1, angle=90)
arrows(2005, conf.2005.upp, 2005, conf.2005.low, code = 3, lwd=1, angle=90)
arrows(2009, conf.2009.upp, 2009, conf.2009.low, code = 3, lwd=1, angle=90)

arrows(1996, 2.3, 1996, 1.2, lwd=3, col = "black")
text(x=2000.8, y=2.4, labels="First Election Under MMM", cex=2, xpd=NA, pos=2, col = "black")

rm(list=ls())






# ==================================
# Differences-in-means tests reported on page 10 of Appendix
# (sample: all cands prior to ER; LDP, DPJ, and NFP after ER)

district.var.pre <- read.csv("dispersion_in_districts_preER_all.csv")
colnames(district.var.pre) <- c("ku","1986","1990","1993")

district.var.post <- read.csv("dispersion_in_districts_postER_majorcands.csv")
colnames(district.var.post) <- c("ku","1996","2000","2003", "2005", "2009")

pre.ER <- c(district.var.pre$"1986", district.var.pre$"1990", district.var.pre$"1993")
length(na.omit(pre.ER))
post.ER <- c(district.var.post$"1996", 
             district.var.post$"2000",
             district.var.post$"2003",
             district.var.post$"2005",
             district.var.post$"2009")
length(na.omit(post.ER))
t.test(na.omit(pre.ER), na.omit(post.ER))

t.test(na.omit(district.var.pre$"1993"), na.omit(district.var.post$"1996"))

rm(list=ls())





# ==================================
# Analysis of mean district-level dispersion - - competitive candidates prior to ER;
# (Figure 6 in Online Appendix)

district.var.pre <- read.csv("dispersion_in_districts_preER_compcands.csv")
colnames(district.var.pre) <- c("ku","1986","1990","1993")
district.var.post <- read.csv("dispersion_in_districts_postER_majorcands.csv")
colnames(district.var.post) <- c("ku","1996","2000","2003", "2005", "2009")

length(na.omit(district.var.pre$"1986"))
length(na.omit(district.var.pre$"1990"))
length(na.omit(district.var.pre$"1993"))
length(na.omit(district.var.post$"1996"))
length(na.omit(district.var.post$"2000")) 
length(na.omit(district.var.post$"2003")) 
length(na.omit(district.var.post$"2005")) 
length(na.omit(district.var.post$"2009")) 

av.district.var <- as.vector(c(mean(na.omit(district.var.pre$"1986")), 
                               mean(na.omit(district.var.pre$"1990")), 
                               mean(na.omit(district.var.pre$"1993")),
                               mean(na.omit(district.var.post$"1996")), 
                               mean(na.omit(district.var.post$"2000")), 
                               mean(na.omit(district.var.post$"2003")),
                               mean(na.omit(district.var.post$"2005")), 
                               mean(na.omit(district.var.post$"2009"))))

av.district.var #

se <- function(x) sqrt(var(x)/length(x))

se.district.var <- as.vector(c(se(na.omit(district.var.pre$"1986")), 
                               se(na.omit(district.var.pre$"1990")), 
                               se(na.omit(district.var.pre$"1993")),
                               se(na.omit(district.var.post$"1996")), 
                               se(na.omit(district.var.post$"2000")), 
                               se(na.omit(district.var.post$"2003")),
                               se(na.omit(district.var.post$"2005")), 
                               se(na.omit(district.var.post$"2009"))))
se.district.var

conf.1986.upp <- av.district.var[1]+(1.96*(se.district.var[1]))
conf.1986.low <- av.district.var[1]-(1.96*(se.district.var[1]))

conf.1990.upp <- av.district.var[2]+(1.96*(se.district.var[2]))
conf.1990.low <- av.district.var[2]-(1.96*(se.district.var[2]))

conf.1993.upp <- av.district.var[3]+(1.96*(se.district.var[3]))
conf.1993.low <- av.district.var[3]-(1.96*(se.district.var[3]))

conf.1996.upp <- av.district.var[4]+(1.96*(se.district.var[4]))
conf.1996.low <- av.district.var[4]-(1.96*(se.district.var[4]))

inter.1996 <- c(conf.1996.upp, conf.1996.low)
inter.1996

conf.2000.upp <- av.district.var[5]+(1.96*(se.district.var[5]))
conf.2000.low <- av.district.var[5]-(1.96*(se.district.var[5]))

inter.2000 <- c(conf.2000.upp, conf.2000.low)
inter.2000

conf.2003.upp <- av.district.var[6]+(1.96*(se.district.var[6]))
conf.2003.low <- av.district.var[6]-(1.96*(se.district.var[6]))

inter.2003 <- c(conf.2003.upp, conf.2003.low)
inter.2003

conf.2005.upp <- av.district.var[7]+(1.96*(se.district.var[7]))
conf.2005.low <- av.district.var[7]-(1.96*(se.district.var[7]))

inter.2005 <- c(conf.2005.upp, conf.2005.low)
inter.2005

conf.2009.upp <- av.district.var[8]+(1.96*(se.district.var[8]))
conf.2009.low <- av.district.var[8]-(1.96*(se.district.var[8]))

years <- c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009)

par(mfrow = c(1, 1), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)
plot(years, av.district.var, pch=19, ylim=c(0,1.7), 
     ylab="Mean District-Level Dispersion (Competitive Cands)", xlab="Year", xaxt="n", cex.lab=1.3)
axis(1, at=c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009), cex.axis=1.3)
lines(years, av.district.var, lwd =3)

arrows(1986, conf.1986.upp, 1986, conf.1986.low, code = 3, lwd=1, angle=90)
arrows(1990, conf.1990.upp, 1990, conf.1990.low, code = 3, lwd=1, angle=90)
arrows(1993, conf.1993.upp, 1993, conf.1993.low, code = 3, lwd=1, angle=90)
arrows(1996, conf.1996.upp, 1996, conf.1996.low, code = 3, lwd=1, angle=90)
arrows(2000, conf.2000.upp, 2000, conf.2000.low, code = 3, lwd=1, angle=90)
arrows(2003, conf.2003.upp, 2003, conf.2003.low, code = 3, lwd=1, angle=90)
arrows(2005, conf.2005.upp, 2005, conf.2005.low, code = 3, lwd=1, angle=90)
arrows(2009, conf.2009.upp, 2009, conf.2009.low, code = 3, lwd=1, angle=90)

arrows(1996, 1.4, 1996, 0.6, lwd=3, col = "black")
text(x=2001.5, y=1.5, labels="First Election Under MMM", cex=2, xpd=NA, pos=2, col = "black")

rm(list=ls())









# ==================================
# Differences-in-means tests reported on page 11 of Appendix
# (sample: competitive candidates prior to ER)

district.var.pre <- read.csv("dispersion_in_districts_preER_compcands.csv")
colnames(district.var.pre) <- c("ku","1986","1990","1993")
district.var.post <- read.csv("dispersion_in_districts_postER_majorcands.csv")
colnames(district.var.post) <- c("ku","1996","2000","2003", "2005", "2009")

pre.ER <- c(district.var.pre$"1986", district.var.pre$"1990", district.var.pre$"1993")
length(na.omit(pre.ER))

post.ER <- c(district.var.post$"1996", 
             district.var.post$"2000",
             district.var.post$"2003",
             district.var.post$"2005",
             district.var.post$"2009")
length(na.omit(post.ER))
t.test(na.omit(pre.ER), na.omit(post.ER))
t.test(na.omit(district.var.pre$"1993"), na.omit(district.var.post$"1996"))

rm(list=ls())
